add a `basic_auth` option to the website agent

Albert Sun 11 years ago
parent
commit
7996954a3b
2 changed files with 191 additions and 154 deletions
  1. 7 1
      app/models/agents/website_agent.rb
  2. 184 153
      spec/models/agents/website_agent_spec.rb

+ 7 - 1
app/models/agents/website_agent.rb

@@ -32,6 +32,8 @@ module Agents
32 32
 
33 33
       Note that for all of the formats, whatever you extract MUST have the same number of matches for each extractor.  E.g., if you're extracting rows, all extractors must match all rows.  For generating CSS selectors, something like [SelectorGadget](http://selectorgadget.com) may be helpful.
34 34
 
35
+      Can be configured to use HTTP basic auth by including the `basic_auth` parameter with `username:password`.
36
+
35 37
       Set `expected_update_period_in_days` to the maximum amount of time that you'd expect to pass between Events being created by this Agent.
36 38
     MD
37 39
 
@@ -70,7 +72,11 @@ module Agents
70 72
     def check
71 73
       hydra = Typhoeus::Hydra.new
72 74
       log "Fetching #{options['url']}"
73
-      request = Typhoeus::Request.new(options['url'], :followlocation => true)
75
+      request_opts = {:followlocation => true}
76
+      if !options['basic_auth'].blank?
77
+        request_opts[:userpwd] = options['basic_auth']
78
+      end
79
+      request = Typhoeus::Request.new(options['url'], request_opts)
74 80
       request.on_failure do |response|
75 81
         error "Failed: #{response.inspect}"
76 82
       end

+ 184 - 153
spec/models/agents/website_agent_spec.rb

@@ -1,189 +1,220 @@
1 1
 require 'spec_helper'
2 2
 
3 3
 describe Agents::WebsiteAgent do
4
-  before do
5
-    stub_request(:any, /xkcd/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
6
-    @site = {
7
-      'name' => "XKCD",
8
-      'expected_update_period_in_days' => 2,
9
-      'type' => "html",
10
-      'url' => "http://xkcd.com",
11
-      'mode' => 'on_change',
12
-      'extract' => {
13
-        'url' => {'css' => "#comic img", 'attr' => "src"},
14
-        'title' => {'css' => "#comic img", 'attr' => "title"}
4
+  describe "checking without basic auth" do
5
+    before do
6
+      stub_request(:any, /xkcd/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
7
+      @site = {
8
+        'name' => "XKCD",
9
+        'expected_update_period_in_days' => 2,
10
+        'type' => "html",
11
+        'url' => "http://xkcd.com",
12
+        'mode' => 'on_change',
13
+        'extract' => {
14
+          'url' => {'css' => "#comic img", 'attr' => "src"},
15
+          'title' => {'css' => "#comic img", 'attr' => "title"}
16
+        }
15 17
       }
16
-    }
17
-    @checker = Agents::WebsiteAgent.new(:name => "xkcd", :options => @site)
18
-    @checker.user = users(:bob)
19
-    @checker.save!
20
-  end
21
-
22
-  describe "#check" do
23
-    it "should check for changes" do
24
-      lambda { @checker.check }.should change { Event.count }.by(1)
25
-      lambda { @checker.check }.should_not change { Event.count }
18
+      @checker = Agents::WebsiteAgent.new(:name => "xkcd", :options => @site)
19
+      @checker.user = users(:bob)
20
+      @checker.save!
26 21
     end
27 22
 
28
-    it "should always save events when in :all mode" do
29
-      lambda {
30
-        @site['mode'] = 'all'
23
+    describe "#check" do
24
+      it "should check for changes" do
25
+        lambda { @checker.check }.should change { Event.count }.by(1)
26
+        lambda { @checker.check }.should_not change { Event.count }
27
+      end
28
+
29
+      it "should always save events when in :all mode" do
30
+        lambda {
31
+          @site['mode'] = 'all'
32
+          @checker.options = @site
33
+          @checker.check
34
+          @checker.check
35
+        }.should change { Event.count }.by(2)
36
+      end
37
+
38
+      it "should log an error if the number of results for a set of extraction patterns differs" do
39
+        @site['extract']['url']['css'] = "div"
31 40
         @checker.options = @site
32 41
         @checker.check
33
-        @checker.check
34
-      }.should change { Event.count }.by(2)
35
-    end
36
-
37
-    it "should log an error if the number of results for a set of extraction patterns differs" do
38
-      @site['extract']['url']['css'] = "div"
39
-      @checker.options = @site
40
-      @checker.check
41
-      @checker.logs.first.message.should =~ /Got an uneven number of matches/
42
+        @checker.logs.first.message.should =~ /Got an uneven number of matches/
43
+      end
42 44
     end
43
-  end
44 45
 
45
-  describe '#working?' do
46
-    it 'checks if events have been received within the expected receive period' do
47
-      stubbed_time = Time.now
48
-      stub(Time).now { stubbed_time }
46
+    describe '#working?' do
47
+      it 'checks if events have been received within the expected receive period' do
48
+        stubbed_time = Time.now
49
+        stub(Time).now { stubbed_time }
49 50
 
50
-      @checker.should_not be_working # No events created
51
-      @checker.check
52
-      @checker.reload.should be_working # Just created events
53
-
54
-      @checker.error "oh no!"
55
-      @checker.reload.should_not be_working # There is a recent error
51
+        @checker.should_not be_working # No events created
52
+        @checker.check
53
+        @checker.reload.should be_working # Just created events
56 54
 
57
-      stubbed_time = 20.minutes.from_now
58
-      @checker.events.delete_all
59
-      @checker.check
60
-      @checker.reload.should be_working # There is a newer event now
55
+        @checker.error "oh no!"
56
+        @checker.reload.should_not be_working # There is a recent error
61 57
 
62
-      stubbed_time = 2.days.from_now
63
-      @checker.reload.should_not be_working # Two days have passed without a new event having been created
64
-    end
65
-  end
58
+        stubbed_time = 20.minutes.from_now
59
+        @checker.events.delete_all
60
+        @checker.check
61
+        @checker.reload.should be_working # There is a newer event now
66 62
 
67
-  describe "parsing" do
68
-    it "parses CSS" do
69
-      @checker.check
70
-      event = Event.last
71
-      event.payload['url'].should == "http://imgs.xkcd.com/comics/evolving.png"
72
-      event.payload['title'].should =~ /^Biologists play reverse/
63
+        stubbed_time = 2.days.from_now
64
+        @checker.reload.should_not be_working # Two days have passed without a new event having been created
65
+      end
73 66
     end
74 67
 
75
-    it "should turn relative urls to absolute" do
76
-      rel_site = {
77
-        'name' => "XKCD",
78
-        'expected_update_period_in_days' => 2,
79
-        'type' => "html",
80
-        'url' => "http://xkcd.com",
81
-        'mode' => :on_change,
82
-        'extract' => {
83
-          'url' => {'css' => "#topLeft a", 'attr' => "href"},
84
-          'title' => {'css' => "#topLeft a", 'text' => "true"}
85
-        }
86
-      }
87
-      rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
88
-      rel.user = users(:bob)
89
-      rel.save!
90
-      rel.check
91
-      event = Event.last
92
-      event.payload['url'].should == "http://xkcd.com/about"
93
-    end
68
+    describe "parsing" do
69
+      it "parses CSS" do
70
+        @checker.check
71
+        event = Event.last
72
+        event.payload['url'].should == "http://imgs.xkcd.com/comics/evolving.png"
73
+        event.payload['title'].should =~ /^Biologists play reverse/
74
+      end
94 75
 
95
-    describe "JSON" do
96
-      it "works with paths" do
97
-        json = {
98
-          'response' => {
99
-            'version' => 2,
100
-            'title' => "hello!"
101
-          }
102
-        }
103
-        stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
104
-        site = {
105
-          'name' => "Some JSON Response",
76
+      it "should turn relative urls to absolute" do
77
+        rel_site = {
78
+          'name' => "XKCD",
106 79
           'expected_update_period_in_days' => 2,
107
-          'type' => "json",
108
-          'url' => "http://json-site.com",
109
-          'mode' => 'on_change',
80
+          'type' => "html",
81
+          'url' => "http://xkcd.com",
82
+          'mode' => :on_change,
110 83
           'extract' => {
111
-            'version' => {'path' => "response.version"},
112
-            'title' => {'path' => "response.title"}
84
+            'url' => {'css' => "#topLeft a", 'attr' => "href"},
85
+            'title' => {'css' => "#topLeft a", 'text' => "true"}
113 86
           }
114 87
         }
115
-        checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
116
-        checker.user = users(:bob)
117
-        checker.save!
118
-
119
-        checker.check
88
+        rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
89
+        rel.user = users(:bob)
90
+        rel.save!
91
+        rel.check
120 92
         event = Event.last
121
-        event.payload['version'].should == 2
122
-        event.payload['title'].should == "hello!"
93
+        event.payload['url'].should == "http://xkcd.com/about"
123 94
       end
124 95
 
125
-      it "can handle arrays" do
126
-        json = {
127
-          'response' => {
128
-            'data' => [
129
-              {'title' => "first", 'version' => 2},
130
-              {'title' => "second", 'version' => 2.5}
131
-            ]
96
+      describe "JSON" do
97
+        it "works with paths" do
98
+          json = {
99
+            'response' => {
100
+              'version' => 2,
101
+              'title' => "hello!"
102
+            }
132 103
           }
133
-        }
134
-        stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
135
-        site = {
136
-          'name' => "Some JSON Response",
137
-          'expected_update_period_in_days' => 2,
138
-          'type' => "json",
139
-          'url' => "http://json-site.com",
140
-          'mode' => 'on_change',
141
-          'extract' => {
142
-            :title => {'path' => "response.data[*].title"},
143
-            :version => {'path' => "response.data[*].version"}
104
+          stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
105
+          site = {
106
+            'name' => "Some JSON Response",
107
+            'expected_update_period_in_days' => 2,
108
+            'type' => "json",
109
+            'url' => "http://json-site.com",
110
+            'mode' => 'on_change',
111
+            'extract' => {
112
+              'version' => {'path' => "response.version"},
113
+              'title' => {'path' => "response.title"}
114
+            }
144 115
           }
145
-        }
146
-        checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
147
-        checker.user = users(:bob)
148
-        checker.save!
116
+          checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
117
+          checker.user = users(:bob)
118
+          checker.save!
149 119
 
150
-        lambda {
151 120
           checker.check
152
-        }.should change { Event.count }.by(2)
153
-
154
-        event = Event.all[-1]
155
-        event.payload['version'].should == 2.5
156
-        event.payload['title'].should == "second"
121
+          event = Event.last
122
+          event.payload['version'].should == 2
123
+          event.payload['title'].should == "hello!"
124
+        end
125
+
126
+        it "can handle arrays" do
127
+          json = {
128
+            'response' => {
129
+              'data' => [
130
+                {'title' => "first", 'version' => 2},
131
+                {'title' => "second", 'version' => 2.5}
132
+              ]
133
+            }
134
+          }
135
+          stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
136
+          site = {
137
+            'name' => "Some JSON Response",
138
+            'expected_update_period_in_days' => 2,
139
+            'type' => "json",
140
+            'url' => "http://json-site.com",
141
+            'mode' => 'on_change',
142
+            'extract' => {
143
+              :title => {'path' => "response.data[*].title"},
144
+              :version => {'path' => "response.data[*].version"}
145
+            }
146
+          }
147
+          checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
148
+          checker.user = users(:bob)
149
+          checker.save!
150
+
151
+          lambda {
152
+            checker.check
153
+          }.should change { Event.count }.by(2)
154
+
155
+          event = Event.all[-1]
156
+          event.payload['version'].should == 2.5
157
+          event.payload['title'].should == "second"
158
+
159
+          event = Event.all[-2]
160
+          event.payload['version'].should == 2
161
+          event.payload['title'].should == "first"
162
+        end
163
+
164
+        it "stores the whole object if :extract is not specified" do
165
+          json = {
166
+            'response' => {
167
+              'version' => 2,
168
+              'title' => "hello!"
169
+            }
170
+          }
171
+          stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
172
+          site = {
173
+            'name' => "Some JSON Response",
174
+            'expected_update_period_in_days' => 2,
175
+            'type' => "json",
176
+            'url' => "http://json-site.com",
177
+            'mode' => 'on_change'
178
+          }
179
+          checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
180
+          checker.user = users(:bob)
181
+          checker.save!
157 182
 
158
-        event = Event.all[-2]
159
-        event.payload['version'].should == 2
160
-        event.payload['title'].should == "first"
183
+          checker.check
184
+          event = Event.last
185
+          event.payload['response']['version'].should == 2
186
+          event.payload['response']['title'].should == "hello!"
187
+        end
161 188
       end
189
+    end
190
+  end
162 191
 
163
-      it "stores the whole object if :extract is not specified" do
164
-        json = {
165
-          'response' => {
166
-            'version' => 2,
167
-            'title' => "hello!"
168
-          }
169
-        }
170
-        stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
171
-        site = {
172
-          'name' => "Some JSON Response",
173
-          'expected_update_period_in_days' => 2,
174
-          'type' => "json",
175
-          'url' => "http://json-site.com",
176
-          'mode' => 'on_change'
177
-        }
178
-        checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
179
-        checker.user = users(:bob)
180
-        checker.save!
192
+  describe "checking with http basic auth" do
193
+    before do
194
+      stub_request(:any, /user:pass/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
195
+      @site = {
196
+        'name' => "XKCD",
197
+        'expected_update_period_in_days' => 2,
198
+        'type' => "html",
199
+        'url' => "http://www.example.com",
200
+        'mode' => 'on_change',
201
+        'extract' => {
202
+          'url' => {'css' => "#comic img", 'attr' => "src"},
203
+          'title' => {'css' => "#comic img", 'attr' => "title"}
204
+        },
205
+        'basic_auth' => "user:pass"
206
+      }
207
+      @checker = Agents::WebsiteAgent.new(:name => "auth", :options => @site)
208
+      @checker.user = users(:bob)
209
+      @checker.save!
210
+    end
181 211
 
182
-        checker.check
183
-        event = Event.last
184
-        event.payload['response']['version'].should == 2
185
-        event.payload['response']['title'].should == "hello!"
212
+    describe "#check" do
213
+      it "should check for changes" do
214
+        lambda { @checker.check }.should change { Event.count }.by(1)
215
+        lambda { @checker.check }.should_not change { Event.count }
186 216
       end
187 217
     end
188 218
   end
219
+
189 220
 end